1 Data check

1.1 Load packages

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("tidyr")
library("ggplot2")
library("stringr")
library("tigerstats")
## Loading required package: abd
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: lattice
## Loading required package: grid
## Loading required package: mosaic
## Loading required package: ggformula
## Loading required package: ggstance
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
## 
## Have you tried the ggformula package for your plots?
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following object is masked from 'package:ggplot2':
## 
##     stat
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median,
##     prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
## Welcome to tigerstats!
## To learn more about this package, consult its website:
##  http://homerhanumat.github.io/tigerstats

1.2 Read data

df <- read.csv("../data/c-center_28th_Aug.csv", header = TRUE, sep = ",")
load("../data/speech_rate.Rda")

1.3 Merge mview output with speech rate dataframe

df <- df %>%
  right_join(df_sr, by=c("subj", "fname"))
rm(df_sr)

1.4 Tidy the data

# extract info from filename, merge trialnumber and repetition
df<- df %>%
    separate(fname, c("project", "prompt", "block", "remove", "remove2", "trialno", "repetition"), sep = "_") %>%
    mutate(trial=str_c(trialno,repetition)) 

Drop unwanted columns.

df <- df %>%
select(-c(project, remove, remove2, x1, x2, x3, x4, x5, x6, x7, y1, y2, y3, y4, y5, y6, y7, z1, z2, z3, z4, z5, z6, z7, v1, v2, v3, v4, v5, v6, v7, X.1, X.2, X.3, DOns, DTarg, DOff))

Create new variable recording.no to indicate the recording number (first recording, second recording etc..).

trialno <- df %>% 
group_by(subj) %>% 
distinct(trial) %>% 
arrange(trial, by_group = TRUE) %>%
mutate(recording.no = 1:n()) %>% 
select(recording.no, everything()) %>% 
arrange(subj, recording.no)

Bind trialno with dataframe.

df <- df %>% 
  inner_join(trialno, by = c("subj", "trial")) %>% 
  select(recording.no, everything()) %>% 
  arrange(subj, recording.no)
rm(trialno)

Create group variable.

df$group <- ifelse((str_detect(df$subj, "CTRL")), "CTRL", "PD")
df$group <-  as.factor(df$group)

Create condition based on prompt.

df$condition <- "C"
df$condition[df$prompt == "oma spat" | df$prompt == "opa Smat" |df$prompt == "opa schat" | df$prompt == "oma's prak"] <- "CC"
df$condition[df$prompt == "oma sprak"] <- "CCC"
df$condition <-  as.factor(df$condition)

Create cluster column.

df$cluster[df$prompt == "oma spat" | df$prompt == "oma's pad"] <- "sp" 
df$cluster[df$prompt == "opa schat" | df$prompt == "opa's gat"] <- "sx" 
df$cluster[df$prompt == "opa Smat" | df$prompt == "opa's mat"] <- "sm" 
df$cluster[df$prompt == "oma sprak" | df$prompt == "oma's prak"] <- "spr" 
df$cluster <- as.factor(df$cluster)
levels(df$cluster)
## [1] "sm"  "sp"  "spr" "sx"

Remove faulty segmentation, based on outlier column.

df <- df[is.na(df$outl),]

Check whether the conditions are well coded and whether the segmentation involves the correct articulators.

table(df$prompt, df$condition)
##             
##                 C   CC  CCC
##   oma spat      0 1815    0
##   oma sprak     0    0 1685
##   oma's pad  1798    0    0
##   oma's prak    0 1706    0
##   opa schat     0 1696    0
##   opa Smat      0 1790    0
##   opa's gat  1697    0    0
##   opa's mat  1762    0    0
table(df$prompt, df$cluster)
##             
##                sm   sp  spr   sx
##   oma spat      0 1815    0    0
##   oma sprak     0    0 1685    0
##   oma's pad     0 1798    0    0
##   oma's prak    0    0 1706    0
##   opa schat     0    0    0 1696
##   opa Smat   1790    0    0    0
##   opa's gat     0    0    0 1697
##   opa's mat  1762    0    0    0
table(df$seg, df$traj)
##    
##            LA TB_X TB_XZ TB_Z TT_X TT_XZ TT_Z
##   É‘   93    0   59  2154  332    0     0    0
##   k   44    0    0   159  683    0     0    0
##   m   23  869    0     0    0    0     0    0
##   p   52 1750    0     0    0    0     0    0
##   r   79    0   18    57   31    0   285  263
##   s  109    0    0     0    0    0  3366   60
##   t   67    0    0     0    0   58  2349  207
##   X   37    0  142   298  305    0     0    0

Remove unreliable /sx/ segmentation of PD25.

df <- df[!(df$subj == "PD25" & df$cluster == "sx"),]

2 Preparing data for rightedge analysis

Here we compute the durations between the offset of the vowel’s preceding consonant and the onset of the vowel’s following consonant (i.e. the anchor).

/sp/ environment

# collect onset times coda /t/ (end of word)
onset.t <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.t = t3)

# collect offset times /p/ (gesture of interest), merge dataframes and time normalize (0,1) the offset of /p/
sp <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t5) %>%
  rename(t5.p = t5) %>%
  inner_join(onset.t, by = c("subj", "recording.no", "prompt", "block", "condition", "group", "cluster", "duration.tt", "duration.mt")) %>%
  mutate(time.lag = t3.t-t5.p) 

  rm(onset.t)

# getting rid of NA's
sp <- na.omit(sp, cols="time.lag")

# plot
ggplot(sp[sp$group == "CTRL",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "CTRL: /sp/", y = "Distance to anchor (ms)")

ggplot(sp[sp$group == "PD",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "PD: /sp/", y = "Distance to anchor (ms)")

/sm/ environment

# collect onset times coda /t/ (end of word)
onset.t <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.t = t3)

# collect offset times /m/ (gesture of interest), merge dataframes and time normalize (0,1) the offset of /m/
sm <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t5) %>%
  rename(t5.m = t5) %>%
  inner_join(onset.t, by = c("subj", "recording.no", "prompt", "block", "condition", "group", "cluster", "duration.tt", "duration.mt")) %>%
  mutate(time.lag = t3.t-t5.m) 

  rm(onset.t)
  

# getting rid of NA's
sm <- na.omit(sm, cols="time.lag")

# plot
ggplot(sm[sm$group == "CTRL",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "CTRL: /sm/", y = "Distance to anchor (ms)")

ggplot(sm[sm$group == "PD",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "PD: /sm/", y = "Distance to anchor (ms)")

/sx/ environment

# collect onset times coda /t/ (end of word)
onset.t <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.t = t3)

# collect offset times /x/ (gesture of interest), merge dataframes and time normalize (0,1) the offset of /x/
sx <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t5) %>%
  rename(t5.x = t5) %>%
  inner_join(onset.t, by = c("subj", "recording.no", "prompt", "block", "condition", "group", "cluster", "duration.tt", "duration.mt")) %>%
  mutate(time.lag = t3.t-t5.x)  

  rm(onset.t)
  
# getting rid of NA's
sx <- na.omit(sx, cols="time.lag")

# plot
ggplot(sx[sx$group == "CTRL",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "CTRL: /sx/", y = "Distance to anchor (ms)")

ggplot(sx[sx$group == "PD",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "PD: /sx/", y = "Distance to anchor (ms)")

/spr/ environment

# collect onset times coda /k/ (end of word)
onset.k <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "k") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.k = t3)

# collect offset times /r/ (gesture of interest), merge dataframes and time normalize (0,1) the offset of /r/
spr <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  select(subj, recording.no, prompt, block, condition, seg, group, cluster, duration.tt, duration.mt,  t5) %>%
  rename(t5.r = t5) %>%
  inner_join(onset.k, by = c("subj", "recording.no", "prompt", "block", "condition", "group", "cluster", "duration.tt", "duration.mt")) %>%
  mutate(time.lag = t3.k-t5.r)  

  rm(onset.k)
  

# getting rid of NA's
spr <- na.omit(spr, cols="time.lag")

# plot
ggplot(spr[spr$group == "CTRL",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "CTRL: /spr/", y = "Distance to anchor (ms)")

ggplot(spr[spr$group == "PD",], aes(x=subj, y=time.lag, fill=condition)) + geom_boxplot() + labs(title = "PD: /spr/", y = "Distance to anchor (ms)")

Merge clusters into new dataframe.

df.rightedge <- rbind(sm, sp, sx, spr)
# df.rightedge <- unique(df.rightedge[,c("subj", "group", "condition","cluster","mean.lag.prompt.norm")])
rm(sm, sp, sx, spr)

2.1 Plots

2.1.1 Plots over all speakers

dodge <- position_dodge(width = 0.9)

#right edge shift
ggplot(df.rightedge, aes(x=group, y=time.lag, fill=condition)) + geom_violin() + labs(title = "Over all clusters", y = "Distance to anchor") + geom_boxplot(width=.2, position = dodge)

#/sp/
ggplot(df.rightedge[df.rightedge$cluster == "sp",], aes(x=group, y=time.lag, fill=condition)) + geom_violin() + labs(title = "sp", y = "Lag (ms)") + geom_boxplot(width=.2, position = dodge)

#/sm/
ggplot(df.rightedge[df.rightedge$cluster == "sm",], aes(x=group, y=time.lag, fill=condition)) + geom_violin() + labs(title = "sm", y = "Lag (ms)") + geom_boxplot(width=.2, position = dodge)

#/sX/
ggplot(df.rightedge[df.rightedge$cluster == "sx",], aes(x=group, y=time.lag, fill=condition)) + geom_violin() + labs(title = "sx", y = "Lag (ms)") + geom_boxplot(width=.2, position = dodge)

#/spr/
ggplot(df.rightedge[df.rightedge$cluster == "spr",], aes(x=group, y=time.lag, fill=condition)) + geom_violin() + labs(title = "spr", y = "Lag (ms)") + geom_boxplot(width=.2, position = dodge)

### Plots per speakers

/sp/

ggplot(df.rightedge[df.rightedge$group == "CTRL" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sp",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/sp/, CTRL") + geom_boxplot()

ggplot(df.rightedge[df.rightedge$group == "PD" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sp",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/sp/, PD, ", y = "Lag (ms)") + geom_boxplot()

/sm/

ggplot(df.rightedge[df.rightedge$group == "CTRL" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sm",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/sm/, CTRL", y = "Lag (ms)") + geom_boxplot()

ggplot(df.rightedge[df.rightedge$group == "PD" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sm",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/sm/, PD", y = "Lag (ms)") + geom_boxplot()

/sx/

ggplot(df.rightedge[df.rightedge$group == "CTRL" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sx",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/sx/, CTRL", y = "Lag (ms)") + geom_boxplot()

ggplot(df.rightedge[df.rightedge$group == "PD" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sx",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/sx/, PD", y = "Lag (ms)") + geom_boxplot()

/spr/

ggplot(df.rightedge[df.rightedge$group == "CTRL" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sp",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/spr/, CTRL", y = "Lag (ms)") + geom_boxplot()

ggplot(df.rightedge[df.rightedge$group == "PD" & df.rightedge$prompt != "oma's prak" & df.rightedge$cluster == "sp",], aes(x=subj, y=time.lag, fill=condition)) + labs(title = "/spr/, PD", y = "Lag (ms)") + geom_boxplot()

3 Preparing data for within cluster analysis

/sp/ CC lag

# collect offset times /s/
offset.s <- df %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t5) %>%
  rename(t5.s = t5) 

# collect onset times /p/ and merge df's
sp.CC <- df %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.p = t3) %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.p-t5.s)

rm(offset.s)

# getting rid of NA's
sp.CC <- na.omit(sp.CC, cols="time.lag")

# plot
ggplot(sp.CC, aes(x=subj, y=time.lag, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "CTRL: /sp/", y = "Lag between C's (ms)")

/sm/ CC lag

# collect  offset times /s/
offset.s <- df %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t5) %>%
  rename(t5.s = t5)
  
# collect offset times /m/
sm.CC <- df %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.m = t3)  %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.m-t5.s)

rm(offset.s)

# getting rid of NA's
sm.CC <- na.omit(sm.CC, cols="time.lag")

# plot
ggplot(sm.CC, aes(x=subj, y=time.lag, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "PD: /sm/", y = "Lag between C's (ms)")

/sx/ CC lag

# collect offset times /s/
offset.s <- df %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t5) %>%
  rename(t5.s = t5)

# collect onset times /x/
sx.CC <- df %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.x = t3)  %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.x-t5.s)

rm(offset.s)

# getting rid of NA's
sx.CC <- na.omit(sx.CC, cols="time.lag")


# plot
ggplot(sx.CC, aes(x=subj, y=time.lag, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "PD: /sx/", y = "Lag between C's (ms)")

/pr/ CC lag

# collect offset times /s/
offset.p <- df %>%
  filter(prompt == "oma's prak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  select(subj, recording.no, t5) %>%
  rename(t5.p = t5)

# collect onset times /x/
pr.CC <- df %>%
  filter(prompt == "oma's prak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.r = t3)  %>%
  inner_join(offset.p, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.r-t5.p)

rm(offset.p)

# getting rid of NA's
pr.CC <- na.omit(pr.CC, cols="time.lag")

# plot
ggplot(pr.CC, aes(x=subj, y=time.lag, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "PD: /pr/", y = "Lag between C's (ms)")

/spr/ CC lag

# collect normalized offset times /s/
offset.s <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t5) %>%
  rename(t5.s = t5)  

# collect normalized offset times /r/
spr.CC <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3) %>%
  rename(t3.r = t3)  %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.r-t5.s)

rm(offset.s)


# getting rid of NA's
spr.CC <- na.omit(spr.CC, cols="time.lag")


# plot
ggplot(spr.CC, aes(x=subj, y=time.lag, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "CTRL: /spr/", y = "Lag between C's (ms)")

Merge.

df.CC.lag<- rbind(sm.CC, sp.CC, sx.CC, pr.CC, spr.CC)
rm(sm.CC, sp.CC, sx.CC, pr.CC, spr.CC)

3.1 Some plots

ggplot(df.CC.lag, aes(x=condition, y=time.lag, fill=group)) + geom_violin() + labs(title = "CC lag all clusters", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$cluster == "sp",], aes(x=condition, y=time.lag, fill=group)) + geom_violin() + labs(title = "CC lag /sp/", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$cluster == "sm",], aes(x=condition, y=time.lag, fill=group)) + geom_violin() + labs(title = "CC lag /sm/", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$prompt == "oma's prak",], aes(x=condition, y=time.lag, fill=group)) + geom_violin() + labs(title = "CC lag /pr/", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$prompt == "oma sprak",], aes(x=condition, y=time.lag, fill=group)) + geom_violin() + labs(title = "CC lag /spr/", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

4 Examining duration of C’s in onset.

df <- df %>%
    rowwise() %>%
    mutate(dur=t5 - t3) 

Plot durations of right C per group:

ggplot(df[df$cluster == 'sm' & df$seg == "m",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /m/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 28 rows containing non-finite values (stat_ydensity).
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg == "p",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /p/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 16 rows containing non-finite values (stat_ydensity).
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sx' & df$seg == "X",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /x/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 49 rows containing non-finite values (stat_ydensity).
## Warning: Removed 49 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'spr' & df$seg == "r",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /r/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 89 rows containing non-finite values (stat_ydensity).
## Warning: Removed 89 rows containing non-finite values (stat_boxplot).

Plot durations of right C per subject:

ggplot(df[df$cluster == 'sm' & df$seg == "m" & df$group == "CTRL",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /m/ CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 8 rows containing non-finite values (stat_ydensity).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sm' & df$seg == "m" & df$group == "PD",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /m/ PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 20 rows containing non-finite values (stat_ydensity).
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg == "p" & df$group == "CTRL",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /p/ CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 12 rows containing non-finite values (stat_ydensity).
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg == "p" & df$group == "PD",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /p/ PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 4 rows containing non-finite values (stat_ydensity).
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

Plot duration of first C per group

ggplot(df[df$cluster == 'sm' & df$seg == "s",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /s/ in /sm/ and /s#m/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 29 rows containing non-finite values (stat_ydensity).
## Warning: Removed 29 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg == "s",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /s/ in /sp/ and /s#p/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 15 rows containing non-finite values (stat_ydensity).
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).

Compare duration of C’s in onset per group

ggplot(df[df$cluster == 'sm' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=group, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and /m/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 40 rows containing non-finite values (stat_ydensity).
## Warning: Removed 40 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=group, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and /p/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 17 rows containing non-finite values (stat_ydensity).
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).

Compare duration of C’s in onset per subj

ggplot(df[df$group == 'CTRL' & df$cluster == 'sm' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and/m/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 8 rows containing non-finite values (stat_ydensity).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

ggplot(df[df$group == 'PD' & df$cluster == 'sm' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and/m/ in CC, PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 32 rows containing non-finite values (stat_ydensity).
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

ggplot(df[df$group == 'CTRL' & df$cluster == 'sp' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and/p/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 13 rows containing non-finite values (stat_ydensity).
## Warning: Removed 13 rows containing non-finite values (stat_boxplot).

ggplot(df[df$group == 'PD' & df$cluster == 'sp' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and /p/ in CC, PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 4 rows containing non-finite values (stat_ydensity).
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

5 Examining landmarks

5.1 Derive temporal midpoint of nucleus

df <- df %>%
  rowwise() %>%
  mutate(mid.nuc=mean(c(t3, t5), na.rm=T)) 

5.2 Examine relation gestural onset/offest to nucleus midpoint

df.density <- df %>%
  rowwise() %>%
  mutate(GONS = mid.nuc - t1) %>%
  mutate(GOFF = t7 - mid.nuc)  %>%
  gather(side, time.to.mid, GONS:GOFF, factor_key=TRUE, na.rm = T)

  ggplot(df.density[df.density$condition == 'C' & df.density$seg == "s",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /s/ in C")

  ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "s",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /s/ in CC")

  ggplot(df.density[df.density$condition == 'C' & df.density$seg == "m",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /m/ in C")

    ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "m",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /m/ in CC")

      ggplot(df.density[df.density$condition == 'C' & df.density$seg == "p",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /p/ in C")

    ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "p",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /p/ in CC")

      ggplot(df.density[df.density$condition == 'C' & df.density$seg == "X",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /x/ in C")

    ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "X",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /x/ in CC")

6 Preparing data for C-center analysis based on nucleus on and offset

6.1 Calculate lag’s for /sp/

# derive time of nucleus midpoint onset /p/ 
mid.nuc.p <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(mid.nuc.p = mid.nuc) %>%
  select(subj, prompt, recording.no, mid.nuc.p)
  
# derive time of nucleus midpoint onset /s/ 
mid.nuc.s <- df %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = mid.nuc) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
sp.c.center <- mid.nuc.p %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "oma's pad" ~ mid.nuc.p, prompt == "oma spat" ~ mean(c(mid.nuc.s, mid.nuc.p)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.p, mid.nuc.s)

Get rightedge /sp/

# collect offset times /p/ 
sp.rightedge <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /sp/

# collect offset times /p/ 
 sp.leftedge.CC <- df %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /p/

# collect offset times /p/ 
sp.leftedge.C <- df %>%
  filter(prompt == "oma's pad") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Merge

sp.leftedge <- rbind(sp.leftedge.CC, sp.leftedge.C)

Get anchor /t/

# collect onset times coda /t/
sp.anchor <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.sp <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  left_join(sp.c.center, by =c("subj", "recording.no")) %>%
  left_join(sp.rightedge, by =c("subj", "recording.no")) %>%
  left_join(sp.leftedge, by =c("subj", "recording.no")) %>%
  left_join(sp.anchor, by =c("subj", "recording.no"))

rm(sp.c.center, sp.rightedge, sp.anchor)

6.2 Calculate lag’s for /sm/

# collect mid.nuc time onset /m/ 
mid.nuc.m <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  rename(mid.nuc.m = t4) %>%
  select(subj, prompt, recording.no, mid.nuc.m)
  
# collect mid.nuc time onset /s/
mid.nuc.s <- df %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = t4) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
sm.c.center <- mid.nuc.m %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "opa's mat" ~ mid.nuc.m, prompt == "opa Smat" ~ mean(c(mid.nuc.s, mid.nuc.m)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.m, mid.nuc.s)

Get rightedge /sm/

# collect offset times /m/ 
sm.rightedge <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /sm/

# collect onset times /s/ 
 sm.leftedge.CC <- df %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /m/

# collect onset times /m/ 
sm.leftedge.C <- df %>%
  filter(prompt == "opa's mat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Merge

sm.leftedge <- rbind(sm.leftedge.CC, sm.leftedge.C)

Get velocity based rightedge /sm/

# collect offset times /m/ 
sm.rightedge.vel <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  rename(rightedge.vel = t6) %>%
  select(subj, recording.no, rightedge.vel) 

Get anchor /t/

# collect onset times coda /t/
sm.anchor <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.sm <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  left_join(sm.c.center, by =c("subj", "recording.no")) %>%
  left_join(sm.rightedge, by =c("subj", "recording.no")) %>%
  left_join(sm.leftedge, by =c("subj", "recording.no")) %>%
  left_join(sm.anchor, by =c("subj", "recording.no"))

rm(sm.c.center, sm.rightedge, sm.anchor)

6.3 Calculate lag’s for /sx/

# collect mid.nuc time onset /x/ 
mid.nuc.x <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  rename(mid.nuc.x = t4) %>%
  select(subj, prompt, recording.no, mid.nuc.x)
  
# collect mid.nuc time onset /s/
mid.nuc.s <- df %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = t4) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
sx.c.center <- mid.nuc.x %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "opa's gat" ~ mid.nuc.x, prompt == "opa schat" ~ mean(c(mid.nuc.s, mid.nuc.x)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.x, mid.nuc.s)

Get rightedge /sx/

# collect offset times /x/ 
sx.rightedge <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /sx/

# collect onset times /s/ 
 sx.leftedge.CC <- df %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /x/

# collect onset times /m/ 
sx.leftedge.C <- df %>%
  filter(prompt == "opa's gat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Merge

sx.leftedge <- rbind(sx.leftedge.CC, sx.leftedge.C)

Get anchor /t/

# collect onset times coda /t/
sx.anchor <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.sx <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  left_join(sx.c.center, by =c("subj", "recording.no")) %>%
  left_join(sx.rightedge, by =c("subj", "recording.no")) %>%
  left_join(sx.leftedge, by =c("subj", "recording.no")) %>%
  left_join(sx.anchor, by =c("subj", "recording.no"))

rm(sx.c.center, sx.rightedge, sx.anchor)

6.4 Calculate lag’s for /spr/

# collect mid.nuc time onset /r/ 
mid.nuc.r <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  rename(mid.nuc.r = t4) %>%
  select(subj, prompt, recording.no, mid.nuc.r)
  
# collect mid.nuc time onset /p/
mid.nuc.p <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(mid.nuc.p = t4) %>%
  select(subj, prompt, recording.no,  mid.nuc.p) 

# collect mid.nuc time onset /s/
mid.nuc.s <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = t4) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
spr.c.center <- mid.nuc.r %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  left_join(mid.nuc.p, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "oma's prak" ~ mid.nuc.r, prompt == "oma sprak" ~ mean(c(mid.nuc.s, mid.nuc.p, mid.nuc.r)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.r, mid.nuc.s, mid.nuc.p)

Get rightedge /spr/

# collect offset times /r/ 
spr.rightedge <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /spr/

# collect onset times /s/ 
spr.leftedge.CCC <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /pr/

# collect onset times /p/ 
spr.leftedge.CC <- df %>%
  filter(prompt == "oma's prak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(leftedge = t4) %>%
  select(subj, recording.no, leftedge) 

Merge

spr.leftedge <- rbind(spr.leftedge.CCC, spr.leftedge.CC)

Get anchor /k/

# collect onset times coda /t/
spr.anchor <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "k") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.spr <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  left_join(spr.c.center, by =c("subj", "recording.no")) %>%
  left_join(spr.rightedge, by =c("subj", "recording.no")) %>%
  left_join(spr.leftedge, by =c("subj", "recording.no")) %>%
  left_join(spr.anchor, by =c("subj", "recording.no"))

rm(spr.c.center, spr.rightedge, spr.anchor)

6.5 Merge C-center df’s

df.C.center <- rbind(df.C.center.sp, df.C.center.sm, df.C.center.sx, df.C.center.spr)
rm(df.C.center.sp, df.C.center.sm, df.C.center.sx, df.C.center.spr)

df.C.center <- df.C.center  %>%
    rowwise() %>%
    mutate(c.center.lag = anchor - c.center) %>%
    mutate(rightedge.lag = anchor - rightedge)%>%
    mutate(leftedge.lag = anchor - leftedge) 

6.6 Distribution

xtabs(c.center.lag ~ subj, data = df.C.center)
## subj
##   CTRL01   CTRL02   CTRL03   CTRL04   CTRL05   CTRL06   CTRL07   CTRL08 
## 69691.58 67511.67 65732.57 38830.00 61136.88 76342.08 59733.33 64418.75 
##   CTRL09   CTRL10   CTRL11   CTRL12   CTRL13   CTRL14   CTRL15   CTRL16 
## 52767.08 64679.17 89459.38 64703.33 47520.00 65417.50 61971.67 39735.83 
##   CTRL17   CTRL18   CTRL19   CTRL20   CTRL21   CTRL23   CTRL24   CTRL25 
## 34305.00 56720.42 24748.33 44123.12 67012.50 54380.83 44327.50 55505.83 
##     PD01     PD02     PD03     PD04     PD05     PD06     PD07     PD08 
## 45773.33 55804.17 46697.50 38727.50 59289.57 62615.42 54078.33 52945.83 
##     PD09     PD10     PD11     PD12     PD13     PD14     PD15     PD16 
## 61775.21 59651.67 53663.70 50672.50 67233.33 57992.50 59865.83 52280.00 
##     PD17     PD18     PD19     PD21     PD22     PD23     PD25          
## 60944.58 61070.00  9412.50 52150.00 48160.62 29877.50 37833.33     0.00

6.7 Some plots

6.7.1 Per group

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 191 rows containing non-finite values (stat_ydensity).
## Warning: Removed 191 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, rightedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 158 rows containing non-finite values (stat_ydensity).
## Warning: Removed 158 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, leftedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_ydensity).
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 158 rows containing non-finite values (stat_ydensity).
## Warning: Removed 158 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 101 rows containing non-finite values (stat_ydensity).
## Warning: Removed 101 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 230 rows containing non-finite values (stat_ydensity).
## Warning: Removed 230 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 575 rows containing non-finite values (stat_ydensity).
## Warning: Removed 575 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 531 rows containing non-finite values (stat_ydensity).
## Warning: Removed 531 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 463 rows containing non-finite values (stat_ydensity).
## Warning: Removed 463 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1138 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1138 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1415 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1415 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 924 rows containing non-finite values (stat_ydensity).
## Warning: Removed 924 rows containing non-finite values (stat_boxplot).

6.7.2 Per individual

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 50 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 132 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 108 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 88 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 70 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 42 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 297 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 253 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 546 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 702 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 592 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 713 rows containing non-finite values (stat_boxplot).

6.8 Some plots

6.8.1 Per group

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 191 rows containing non-finite values (stat_ydensity).
## Warning: Removed 191 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, rightedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 158 rows containing non-finite values (stat_ydensity).
## Warning: Removed 158 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, leftedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_ydensity).
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 158 rows containing non-finite values (stat_ydensity).
## Warning: Removed 158 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 101 rows containing non-finite values (stat_ydensity).
## Warning: Removed 101 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 230 rows containing non-finite values (stat_ydensity).
## Warning: Removed 230 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 575 rows containing non-finite values (stat_ydensity).
## Warning: Removed 575 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 531 rows containing non-finite values (stat_ydensity).
## Warning: Removed 531 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 463 rows containing non-finite values (stat_ydensity).
## Warning: Removed 463 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1138 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1138 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1415 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1415 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 924 rows containing non-finite values (stat_ydensity).
## Warning: Removed 924 rows containing non-finite values (stat_boxplot).

6.8.2 Per individual

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 50 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 132 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 108 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 88 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 70 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 42 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 297 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 253 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 546 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 702 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 592 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 713 rows containing non-finite values (stat_boxplot).

7 Save data

#add meta_data

setwd("/Users/45598770/Documents/analysis")
df_meta <- read.csv("metadata.csv", header = TRUE, sep = ",")
df_meta$gender <- as.factor(df_meta$gender)
df.rightedge <-  merge(df.rightedge, df_meta, by="subj")
df.CC.lag <-  merge(df.CC.lag, df_meta, by="subj")
rm(df_meta)

# Set variables that consist of most datapoints as reference variables
df.rightedge$gender <- relevel(df.rightedge$gender, ref = "M")
df.rightedge$group <- relevel(df.rightedge$group, ref = "CTRL")
df.rightedge$dialect <- relevel(df.rightedge$dialect, ref = "Rest")

# Set variables that consist of most datapoints as reference variables
df.CC.lag$gender <- relevel(df.CC.lag$gender, ref = "M")
df.CC.lag$group <- relevel(df.CC.lag$group, ref = "CTRL")
df.CC.lag$dialect <- relevel(df.CC.lag$dialect, ref = "Rest")

save(df.rightedge, file="modelling_data_rightedge.Rda")
save(df.CC.lag, file="modelling_data_CC.Rda")